#!/usr/bin/python
# coding:utf-8
import urlManager, html_downloader, html_parser
import bs4
class spider_main(object):

    def __init__(self):
        self.urls = urlManager.urlManager()
        self.downloader = html_downloader.htmlDownloader()
        self.parser = html_parser.htmlParser()

    def craw(self, root_url):
        self.urls.add_new_url(root_url)
        while self.urls.has_new_url():
            try:
                url = self.urls.get_new_url()
                html_cont = self.downloader.download(url)
                result = self.parser.parse(url, html_cont)
                print(result)

            except Exception as e:
                print("exception")

if __name__ == "__main__":
    urlArr = [
        # "https://baike.baidu.com/item/%E4%B8%AD%E5%A4%AE%E7%94%B5%E5%AD%90%E7%90%86%E5%B7%A5%E5%AD%A6%E9%99%A2"
        "https://baike.baidu.com/item/%E5%A4%A9%E6%99%AE%E5%A4%A7%E5%AD%A6"
        # "https://baike.baidu.com/item/%E5%9C%A3%E5%BD%BC%E5%BE%97%E5%A0%A1%E5%9B%BD%E7%AB%8B%E5%BD%B1%E8%A7%86%E5%A4%A7%E5%AD%A6"
        # "https://baike.baidu.com/item/%E5%9C%A3%E5%BD%BC%E5%BE%97%E5%A0%A1%E5%9B%BD%E7%AB%8B%E5%86%9C%E4%B8%9A%E5%A4%A7%E5%AD%A6"
        # "https://baike.baidu.com/item/%E8%8A%9D%E5%8A%A0%E5%93%A5%E6%B4%9B%E7%BA%A6%E6%8B%89%E5%A4%A7%E5%AD%A6"
        # "https://baike.baidu.com/item/%E9%A9%AC%E9%87%8C%E5%85%B0%E5%A4%A7%E5%AD%A6%E5%B7%B4%E5%B0%94%E7%9A%84%E6%91%A9%E5%88%86%E6%A0%A1"
    ]
    # root_url = "https://baike.baidu.com/item/%E5%A4%A9%E6%99%AE%E5%A4%A7%E5%AD%A6"
    # root_url = "https://baike.baidu.com/item/%E7%BB%B4%E6%8B%89%E8%AF%BA%E7%93%A6%E5%A4%A7%E5%AD%A6"
    objSpider = spider_main()
    for url in urlArr:
        objSpider.craw(url)
